Source of Ratings: https://www.whoscored.com/ - date: 22/4/22
data<-read.csv("Original_Scrape.csv",stringsAsFactors = F)
Perfect_Scores<-data%>%
filter(Rating=="10.00")
DT::datatable(Perfect_Scores)
<<<<<<< HEAD
=======
>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
data_clean<-data%>%
separate(Player.1,into = c("Name","Starting_Position"),sep=",")%>%
filter(!Rating=="-")%>%
mutate(Rating=as.numeric(Rating),
Starting_Position=trimws(Starting_Position))
data_clean$Name2<-gsub(x=data_clean$Name,pattern ="\\(([^()]+)\\)" ,replacement = "")
data_clean<-data_clean%>%
mutate(age=as.numeric(str_extract(Name2,pattern = "[:digit:]+")))
data_clean$NameFinal<-trimws(removeNumbers(data_clean$Name2))
data_clean<-data_clean%>%
separate(date,into = c("day","date"),sep=",")%>%
mutate(day=trimws(day),
date=trimws(date))
sub_mins<-str_extract_all(data_clean$Name,pattern = "\\(([^()]+)\\)",simplify = T)%>%
data.frame()
data_clean<-data_clean%>%
group_by(NameFinal)%>%
mutate(numberofmatches=n())
data_clean<-data_clean%>%
group_by(NameFinal)%>%
mutate(mean=mean(Rating))%>%
ungroup()
data_clean<-data_clean%>%
mutate(deviation=Rating-mean,
square=deviation^2)%>%
group_by(NameFinal)%>%
mutate(square_sum=sum(square))%>%
ungroup()
data_clean<-data_clean%>%
mutate(variance=square_sum/(numberofmatches-1))
stats<-data_clean%>%
select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
unique()
stats_pos<-data_clean%>%
mutate(position=case_when(
Starting_Position%in%c("AMC","AML","AMR")~"Attacking Midfielders",
Starting_Position%in%c("DC","DL","DR")~"Defenders",
Starting_Position%in%c("DMC","DML","DMR")~"Defensive Midfielders",
Starting_Position%in%c("FW","FWL","FWR")~"Forwards",
Starting_Position%in%c("GK")~"GoalKeepers",
Starting_Position%in%c("MC","MR","ML")~"Midfielders",
Starting_Position%in%c("Sub")~"Subs"
))%>%
select(NameFinal,variance,mean,numberofmatches,age,squad,position)%>%
unique()
stats_10<-stats%>%
filter(numberofmatches>9)
stats_pos_10<-stats_pos%>%
filter(numberofmatches>9)
annotations <- data.frame(
xpos = c(-Inf,-Inf,Inf,Inf),
ypos = c(-Inf, Inf,-Inf,Inf),
annotateText = c("Consistently Shit","Inconsistently shit?"
,"Consistently Good","Inconsistently Good?"),
hjustvar = c(0,0,1,1) ,
vjustvar = c(-0.5,1,-0.5,1))
ggplot()+
geom_point(data=stats_10,aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 10 appearances (inc Subs)")+
geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD
data_clean_no_subs<-data_clean%>%
filter(!Starting_Position=="Sub")%>%
group_by(NameFinal)%>%
mutate(numberofmatches=n())
data_clean_no_subs<-data_clean_no_subs%>%
group_by(NameFinal)%>%
mutate(mean=mean(Rating))%>%
ungroup()
data_clean_no_subs<-data_clean_no_subs%>%
mutate(deviation=Rating-mean,
square=deviation^2)%>%
group_by(NameFinal)%>%
mutate(square_sum=sum(square))%>%
ungroup()
data_clean_no_subs<-data_clean_no_subs%>%
mutate(variance=square_sum/(numberofmatches-1))
data_clean_no_subs<-data_clean_no_subs%>%
select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
unique()
ggplot()+
geom_point(data=stats_10,aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 10 appearances (exc Subs)")+
geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD
Subs_version<-data_clean%>%
filter(Starting_Position=="Sub")%>%
group_by(NameFinal)%>%
mutate(numberofmatches=n())
Subs_version<-Subs_version%>%
group_by(NameFinal)%>%
mutate(mean=mean(Rating))%>%
ungroup()
Subs_version<-Subs_version%>%
mutate(deviation=Rating-mean,
square=deviation^2)%>%
group_by(NameFinal)%>%
mutate(square_sum=sum(square))%>%
ungroup()
Subs_version<-Subs_version%>%
mutate(variance=square_sum/(numberofmatches-1))
stats_Subs<-Subs_version%>%
select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
unique()
stats_Subs_5<-stats_Subs%>%
filter(numberofmatches>4)
ggplot()+
geom_point(data=stats_Subs_5,aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 5 sub appearances")+
geom_text_repel(data=stats_Subs_5,aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1)
<<<<<<< HEAD
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="Attacking Midfielders"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Attacking Midfielders",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="Attacking Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1.3)
<<<<<<< HEAD
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="Defenders"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Defenders",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="Defenders"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1.3)
<<<<<<< HEAD
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="Defensive Midfielders"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Defensive Midfielders",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="Defensive Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1.3)
<<<<<<< HEAD
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="Forwards"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Forwards",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="Forwards"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,2.3)
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="GoalKeepers"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - GoalKeepers",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="GoalKeepers"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1.3)
<<<<<<< HEAD
ggplot()+
geom_point(data=stats_pos_10%>%
filter(position=="Midfielders"),aes(x=mean,y=variance))+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Midfielders",subtitle = "At least 10 appearances")+
geom_text_repel(data=stats_pos_10%>%
filter(position=="Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
ylim(-.1,1.3)
<<<<<<< HEAD
ggplot(data=stats_10,aes(x=mean,y=variance))+
geom_point()+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance By Teams",subtitle = "At least 10 appearances")+
ylim(-0.1,1.1)+
geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal),size=3)+
facet_wrap(~squad,ncol=1)+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD
## Players In positions
## Players In positions
data_clean_positions<-data_clean%>%
filter(!Starting_Position=="Sub")%>%
group_by(NameFinal,Starting_Position,squad)%>%
mutate(numberofmatches_in_position=n(),
percentageinpostion=round(numberofmatches_in_position/numberofmatches,2),
mean=mean(Rating),
deviation=Rating-mean,
square=deviation^2,
square_sum=sum(square),
variance=square_sum/(numberofmatches-1))%>%
ungroup()
stats_postions2<-data_clean_positions%>%
select(NameFinal,variance,mean,numberofmatches,age,squad,Starting_Position,numberofmatches_in_position,percentageinpostion)%>%
unique()%>%
filter(numberofmatches_in_position>4)
ggplot()+
geom_point(data=stats_postions2,aes(x=percentageinpostion,y=variance))+
theme_classic()+
labs(x="% of Matches in Position",y="Variance",title="Variance vs % of matches in Position",
subtitle = "At Least 5 matches in a position")
ggplot(data=stats_postions2,aes(x=mean,y=variance))+
geom_point()+
theme_classic()+
labs(x="Average Rating",y="Variance",title = "Average Rating v Variance By Position",subtitle = "At least 5 appearances in position")+
ylim(-0.1,1.1)+
geom_text_repel(data=stats_postions2,aes(x=mean,y=variance,label=NameFinal),size=3)+
facet_wrap(~Starting_Position,ncol=1)+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD
stats_positions3<-data_clean%>%
select(NameFinal,variance,mean,numberofmatches,age,Starting_Position)%>%
filter(!Starting_Position=="Sub")%>%
unique()%>%
group_by(NameFinal)%>%
mutate(different_position=n(),
different_position_text=case_when(
different_position==1~"1",
different_position==2~"2",
different_position==3~"3",
different_position==4~"4",
different_position==5~"5",
different_position==6~"6",
different_position==7~"7",
different_position==8~"8"
))%>%
ungroup()%>%
filter(numberofmatches>9)
ggplot(data=stats_positions3)+
geom_point(aes(x=mean,y=variance))+
theme_classic()+
labs(title="Checking consistancy when playing in multiple positions",
subtitle="At least 5 Matches")+
facet_wrap(~different_position_text,ncol=2)
### Seems when you play more positions you are more consistant but less good
top6<-c("Arsenal","Chelsea","Liverpool","Manchester City","Manchester United","Tottenham")
top6_data_clean<-data_clean%>%
filter(opp%in%top6)%>%
group_by(NameFinal)%>%
mutate(numberofmatches=n(),
mean=mean(Rating),
deviation=Rating-mean,
square=deviation^2,
square_sum=sum(square),
variance=square_sum/(numberofmatches-1))%>%
ungroup()%>%
filter(numberofmatches>4)
stats<-data_clean%>%
select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
unique()
top6_stats<-top6_data_clean%>%
select(NameFinal,variance,mean,numberofmatches)%>%
unique()
ggplot(data=top6_stats,aes(x=mean,y=variance))+
geom_point()+
theme_classic()+
geom_text_repel(data=top6_stats,aes(x=mean,y=variance,label=NameFinal),box.padding = 0.1)+
geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
labs(title="Variance v Average Ratings against top 6",
subtitle="At least 5 Appearances",x="Average Ratings",y="Variance")+
ylim(-0.1,3.5)
## Warning: ggrepel: 267 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
<<<<<<< HEAD